{ "cells": [ { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from scipy.stats import f_oneway\n", "from sklearn.metrics import mean_absolute_error \n", "import altair as alt" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('StudentPerformanceFactors.csv')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "df = df.dropna() \n", "\n", "df = df[df['Tutoring_Sessions'] != 8]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "sum_stats = df['Exam_Score'].describe() \n", "\n", "lower_bound = sum_stats['mean'] - 3 * sum_stats['std']\n", "upper_bound = sum_stats['mean'] + 3 * sum_stats['std'] \n", "filtered_df = df[(df['Exam_Score'] > lower_bound) & (df['Exam_Score'] < upper_bound)] " ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "df = df.iloc[:5000]" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "categorical_columns = ['Parental_Involvement', 'Access_to_Resources', 'Extracurricular_Activities', 'Motivation_Level', 'Internet_Access', 'Family_Income', 'Teacher_Quality', 'School_Type', 'Peer_Influence', 'Learning_Disabilities', 'Parental_Education_Level', 'Distance_from_Home', 'Gender']" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Hours_Studied | \n", "Attendance | \n", "Parental_Involvement | \n", "Access_to_Resources | \n", "Extracurricular_Activities | \n", "Sleep_Hours | \n", "Previous_Scores | \n", "Motivation_Level | \n", "Internet_Access | \n", "Tutoring_Sessions | \n", "Family_Income | \n", "Teacher_Quality | \n", "School_Type | \n", "Peer_Influence | \n", "Physical_Activity | \n", "Learning_Disabilities | \n", "Parental_Education_Level | \n", "Distance_from_Home | \n", "Gender | \n", "Exam_Score | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "23 | \n", "84 | \n", "Low | \n", "High | \n", "No | \n", "7 | \n", "73 | \n", "Low | \n", "Yes | \n", "0 | \n", "Low | \n", "Medium | \n", "Public | \n", "Positive | \n", "3 | \n", "No | \n", "High School | \n", "Near | \n", "Male | \n", "67 | \n", "
| 1 | \n", "19 | \n", "64 | \n", "Low | \n", "Medium | \n", "No | \n", "8 | \n", "59 | \n", "Low | \n", "Yes | \n", "2 | \n", "Medium | \n", "Medium | \n", "Public | \n", "Negative | \n", "4 | \n", "No | \n", "College | \n", "Moderate | \n", "Female | \n", "61 | \n", "
| 2 | \n", "24 | \n", "98 | \n", "Medium | \n", "Medium | \n", "Yes | \n", "7 | \n", "91 | \n", "Medium | \n", "Yes | \n", "2 | \n", "Medium | \n", "Medium | \n", "Public | \n", "Neutral | \n", "4 | \n", "No | \n", "Postgraduate | \n", "Near | \n", "Male | \n", "74 | \n", "
| 3 | \n", "29 | \n", "89 | \n", "Low | \n", "Medium | \n", "Yes | \n", "8 | \n", "98 | \n", "Medium | \n", "Yes | \n", "1 | \n", "Medium | \n", "Medium | \n", "Public | \n", "Negative | \n", "4 | \n", "No | \n", "High School | \n", "Moderate | \n", "Male | \n", "71 | \n", "
| 4 | \n", "19 | \n", "92 | \n", "Medium | \n", "Medium | \n", "Yes | \n", "6 | \n", "65 | \n", "Medium | \n", "Yes | \n", "3 | \n", "Medium | \n", "High | \n", "Public | \n", "Neutral | \n", "4 | \n", "No | \n", "College | \n", "Near | \n", "Female | \n", "70 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 5171 | \n", "12 | \n", "74 | \n", "Low | \n", "High | \n", "Yes | \n", "6 | \n", "55 | \n", "High | \n", "Yes | \n", "0 | \n", "Medium | \n", "Low | \n", "Public | \n", "Positive | \n", "3 | \n", "No | \n", "Postgraduate | \n", "Near | \n", "Male | \n", "63 | \n", "
| 5172 | \n", "18 | \n", "84 | \n", "High | \n", "Low | \n", "No | \n", "6 | \n", "64 | \n", "Medium | \n", "Yes | \n", "2 | \n", "Low | \n", "High | \n", "Public | \n", "Positive | \n", "2 | \n", "No | \n", "High School | \n", "Near | \n", "Female | \n", "67 | \n", "
| 5173 | \n", "14 | \n", "82 | \n", "Medium | \n", "Medium | \n", "Yes | \n", "4 | \n", "67 | \n", "Medium | \n", "Yes | \n", "1 | \n", "High | \n", "Medium | \n", "Public | \n", "Neutral | \n", "4 | \n", "No | \n", "High School | \n", "Moderate | \n", "Female | \n", "65 | \n", "
| 5174 | \n", "23 | \n", "76 | \n", "Medium | \n", "Medium | \n", "No | \n", "7 | \n", "66 | \n", "Medium | \n", "Yes | \n", "2 | \n", "Medium | \n", "Medium | \n", "Public | \n", "Neutral | \n", "3 | \n", "No | \n", "College | \n", "Near | \n", "Male | \n", "67 | \n", "
| 5176 | \n", "12 | \n", "69 | \n", "High | \n", "High | \n", "Yes | \n", "7 | \n", "53 | \n", "Low | \n", "Yes | \n", "2 | \n", "Low | \n", "High | \n", "Public | \n", "Neutral | \n", "3 | \n", "No | \n", "College | \n", "Near | \n", "Male | \n", "64 | \n", "
5000 rows × 20 columns
\n", "